library(dplyr)
# For Model fitting
library(lme4)
library(purrr)
# For diagnostics
library(performance)
# For adding new columns
library(tibble)
library(caret)
# Load data
sys.source("./codes/scripts/code_join_data_full_dataset.R", envir = knitr::knit_global())
# Load functions

## Models
sys.source("./codes/functions/functions_models.R", envir = knitr::knit_global())

Select performance and traits variables

Transform variables to the dataset

Backtranformation info

Back transformation

Even though you’ve done a statistical test on a transformed variable, such as the log of fish abundance, it is not a good idea to report your means, standard errors, etc. in transformed units. A graph that showed that the mean of the log of fish per 75 meters of stream was 1.044 would not be very informative for someone who can’t do fractional exponents in their head. Instead, you should back-transform your results. This involves doing the opposite of the mathematical function you used in the data transformation. For the log transformation, you would back-transform by raising 10 to the power of your number. For example, the log transformed data above has a mean of 1.044 and a 95% confidence interval of ±0.344 log-transformed fish. The back-transformed mean would be 101.044=11.1 fish. The upper confidence limit would be 10(1.044+0.344)=24.4 fish, and the lower confidence limit would be 10(1.044-0.344)=5.0 fish. Note that the confidence interval is not symmetrical; the upper limit is 13.3 fish above the mean, while the lower limit is 6.1 fish below the mean. Also note that you can’t just back-transform the confidence interval and add or subtract that from the back-transformed mean; you can’t take 100.344 and add or subtract that.

Square-root transformation. This consists of taking the square root of each observation. The back transformation is to square the number. If you have negative numbers, you can’t take the square root; you should add a constant to each number to make them all positive.

People often use the square-root transformation when the variable is a count of something, such as bacterial colonies per petri dish, blood cells going through a capillary per minute, mutations per generation, etc.

# Box-Cox transformation was previously run 

data_for_models_transformed <- 
    data_for_models %>% 
        
        mutate(
            # Plant's performance
            total_biomass_sqrt = sqrt(total_biomass),
            above_biomass_sqrt = sqrt(above_biomass),
            below_biomass_log = log(below_biomass),
            agr_log = log(agr),
            
            # NO TRANSFORMATION variable already in log-log
            rgr = rgr,
            
            # NO TRANSFORMATION variable already in log-log
            rgr_slope = rgr_slope,
          
            # Traits
            amax_log = log(amax),
            gs_sqrt = sqrt(gs),
            wue_log = log(wue),
            
            # d13 and d15 where not transformed because the data has negative values
            pnue_log = log(data_for_models$pnue),
          
            # Covariate
            init_height = log(init_height)) %>%   
            
        # Remove original variables (non-transformed)
        dplyr::select(spcode, treatment, nfixer, init_height, everything(),
               -c(5:8,11:13,16))    

Models: Questions 1 and 2

\[response\sim treatment*fixer\ + initial\ height + random( 1|\ specie)\]

# Take response variables' names 
response_vars_q1_q2 <- 
    set_names(names(data_for_models_transformed)[5:(ncol(data_for_models_transformed))])
#data_for_models_transformed[data_for_models_transformed$id == 48,]
models_list_q1_q2 <- map(response_vars_q1_q2, ~ mixed_model_1(response = .x, 
                                                              data = data_for_models_transformed))

Models Nodule count

# Load data
# This step was done like this because I am working with a subset of the data
# source cleaned data
source("./codes/scripts/code_clean_data_nodules.R")

# Delete unused variables
data_nodules_cleaned <-
    data_nodules_cleaned %>%
        
        # add id to rownames for keep track of the rows
        column_to_rownames("id") %>% 
        dplyr::select(spcode,treatment, everything())
# Boxcox for nodule count
BoxCoxTrans(data_nodules_cleaned$number_of_root_nodulation) 
Box-Cox Transformation

48 data points used to estimate Lambda

Input data summary:
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   7.00   26.75   49.50   65.40  104.50  167.00 

Largest/Smallest: 23.9 
Sample Skewness: 0.689 

Estimated Lambda: 0.1 
With fudge factor, Lambda = 0 will be used for transformations
data_nodules_cleaned
    spcode                   treatment number_of_root_nodulation
5       ec                 ambientrain                        25
6       ec                 ambientrain                        30
7       ec                 ambientrain                         7
8       ec                 ambientrain                        35
9       ec ambientrain_water_nutrients                        11
10      ec ambientrain_water_nutrients                        49
11      ec ambientrain_water_nutrients                        29
12      ec ambientrain_water_nutrients                        57
13      ec           ambientrain_water                        48
14      ec           ambientrain_water                        17
15      ec           ambientrain_water                        55
16      ec           ambientrain_water                        27
17      ec       ambientrain_nutrients                        29
18      ec       ambientrain_nutrients                        30
19      ec       ambientrain_nutrients                        22
20      ec       ambientrain_nutrients                        26
105     gs                 ambientrain                        20
106     gs                 ambientrain                        27
107     gs                 ambientrain                        62
108     gs                 ambientrain                        26
109     gs ambientrain_water_nutrients                        26
110     gs ambientrain_water_nutrients                        72
111     gs ambientrain_water_nutrients                        58
112     gs ambientrain_water_nutrients                        62
113     gs           ambientrain_water                        16
114     gs           ambientrain_water                        29
115     gs           ambientrain_water                        18
116     gs           ambientrain_water                        18
117     gs       ambientrain_nutrients                        42
118     gs       ambientrain_nutrients                        69
119     gs       ambientrain_nutrients                        41
120     gs       ambientrain_nutrients                        50
125     dr                 ambientrain                       141
126     dr                 ambientrain                       133
127     dr                 ambientrain                       113
128     dr                 ambientrain                       142
129     dr ambientrain_water_nutrients                       165
130     dr ambientrain_water_nutrients                       129
131     dr ambientrain_water_nutrients                       163
132     dr ambientrain_water_nutrients                       167
133     dr           ambientrain_water                       104
134     dr           ambientrain_water                       114
135     dr           ambientrain_water                       112
136     dr           ambientrain_water                       106
137     dr       ambientrain_nutrients                       104
138     dr       ambientrain_nutrients                        94
139     dr       ambientrain_nutrients                       116
140     dr       ambientrain_nutrients                       103
    nodule_count_lab nodule_mass_in_the_lab average_nodule_weight
5                  7                  0.011           0.001571429
6                  5                  0.052           0.010400000
7                  4                  0.023           0.005750000
8                  8                  0.021           0.002625000
9                  3                  0.047           0.015666667
10                14                  0.042           0.003000000
11                14                  0.068           0.004857143
12                 9                  0.055           0.006111111
13                14                  0.036           0.002571429
14                 4                  0.043           0.010750000
15                 9                  0.038           0.004222222
16                10                  0.024           0.002400000
17                10                  0.114           0.011400000
18                 9                  0.146           0.016222222
19                 6                  0.022           0.003666667
20                 5                  0.073           0.014600000
105                8                  0.016           0.002000000
106               13                  0.027           0.002076923
107               11                  0.008           0.000727273
108                7                  0.017           0.002428571
109               10                  0.022           0.002200000
110                7                  0.015           0.002142857
111                9                  0.028           0.003111111
112                9                  0.026           0.002888889
113                6                  0.025           0.004166667
114                8                  0.027           0.003375000
115                6                  0.032           0.005333333
116                8                  0.015           0.001875000
117                9                  0.041           0.004555556
118                9                  0.012           0.001333333
119               11                  0.027           0.002454545
120               10                  0.008           0.000800000
125               16                  0.012           0.000750000
126               12                  0.013           0.001083333
127               18                  0.031           0.001722222
128               12                  0.014           0.001166667
129               18                  0.018           0.001000000
130               14                  0.010           0.000714286
131               17                  0.020           0.001176471
132               17                  0.015           0.000882353
133               11                  0.013           0.001181818
134               15                  0.010           0.000666667
135               13                  0.022           0.001692308
136                9                  0.008           0.000888889
137               16                  0.012           0.000750000
138               10                  0.018           0.001800000
139               11                  0.019           0.001727273
140               13                  0.026           0.002000000
    estimated_nodule_mass_per_plant init_height
5                             0.039        17.0
6                             0.312        25.0
7                             0.040        14.0
8                             0.092        16.0
9                             0.172        20.5
10                            0.147        21.0
11                            0.141        24.0
12                            0.348        25.5
13                            0.123        20.0
14                            0.183        20.0
15                            0.232        23.0
16                            0.065        24.5
17                            0.331        24.5
18                            0.487        13.5
19                            0.081        17.0
20                            0.380        22.0
105                           0.040        12.5
106                           0.056        13.0
107                           0.045        20.0
108                           0.063        19.0
109                           0.057        17.5
110                           0.154        17.5
111                           0.180        16.5
112                           0.179        14.5
113                           0.067        15.5
114                           0.098        16.0
115                           0.096        15.5
116                           0.034        10.5
117                           0.191        12.5
118                           0.092        17.5
119                           0.101        13.5
120                           0.040        21.0
125                           0.106         9.5
126                           0.144        11.5
127                           0.195        11.0
128                           0.166        11.0
129                           0.165        11.0
130                           0.092         8.5
131                           0.192        10.0
132                           0.147        12.5
133                           0.123        10.0
134                           0.076         7.0
135                           0.190         9.0
136                           0.094         9.5
137                           0.078        10.5
138                           0.169         8.5
139                           0.200        11.0
140                           0.206         9.5

m4 lmer gaussian

lmer_gaussian <- lmer(number_of_root_nodulation ~ treatment + init_height + 
                           (1 |spcode),
                       data = data_nodules_cleaned)
lmer_gaussian_log <-  lmer(log(number_of_root_nodulation) ~ treatment + init_height + 
                           (1 |spcode),
                       data = data_nodules_cleaned)

m5 glmer poisson

glmer_poisson <-  glmer(number_of_root_nodulation ~ treatment + init_height + 
                           (1 |spcode), family = "poisson",
                       data = data_nodules_cleaned)
models_list_nodule_count <- list(lmer_gaussian, lmer_gaussian_log, glmer_poisson)
names(models_list_nodule_count) <- c("lmer_gaussian",
                                     "lmer_gaussian_log",
                                     "glmer_poisson")

Mycorrhizal colonization

I decided not to include it, because I want to focus on Nfixing vrs non-Fixing, 
also I don't trust on the data

Models: Questions 3

\[performance\sim treatment*\ fixer*\ scale(log(trait)\ + initial\ height + random( 1|\ specie)\]

# This funtion takes two columns a create a model formula with all the possible 
# combinations

model_combinations_formulas <- function(y_var, x_var){
           
        variables <- crossing(y_var, x_var)
        pattern <- c('\\+.*|nfixer|treatment|[[:punct:]]| ')
        
           
        # Model
        models <- paste0(variables$y_var,"~treatment*nfixer*",
                            variables$x_var,"+init_height+(1|spcode)")
           
        formulas <- map(models, as.formula)
        
        # Add name to the list 
        names(formulas) <- stringr::str_replace_all(formulas, pattern, replacement = '')
    return(formulas)
}

Scale preditors

data_for_models_transformed_scaled <-         
    data_for_models_transformed %>% 
    
    # test for being sure that I select the traits
    #select(c(4,7,8,13:16))

        # scale only the predictors traits
        mutate(across(c(4,7,8,13:16), scale))
# Select traits (x_vars)
traits_names <- 
    set_names(names(data_for_models_transformed_scaled)
                          [c(7,8,13:16)])
traits_names
      d13c       d15n   amax_log    gs_sqrt    wue_log   pnue_log 
    "d13c"     "d15n" "amax_log"  "gs_sqrt"  "wue_log" "pnue_log" 
# Select plants performance vars (y_vars)
performance_names <- 
    set_names(names(data_for_models_transformed_scaled)[c(5,6,9:12)])

performance_names
                 rgr            rgr_slope   total_biomass_sqrt 
               "rgr"          "rgr_slope" "total_biomass_sqrt" 
  above_biomass_sqrt    below_biomass_log              agr_log 
"above_biomass_sqrt"  "below_biomass_log"            "agr_log" 
models_lmer_formulas <- model_combinations_formulas(performance_names, traits_names)

length(models_lmer_formulas)
[1] 36
models_list_q3 <- map(models_lmer_formulas, 
                       ~ lmer(.x, data = data_for_models_transformed_scaled))

Validation plots

Collinearity

map(models_list_q1_q2, check_collinearity)
$rgr
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.97         1.99      0.25
           nfixer 1.04         1.02      0.96
      init_height 1.09         1.04      0.92
 treatment:nfixer 3.92         1.98      0.26

$rgr_slope
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.97         1.99      0.25
           nfixer 1.05         1.02      0.95
      init_height 1.09         1.04      0.92
 treatment:nfixer 3.93         1.98      0.25

$d13c
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.92         1.98      0.25
           nfixer 1.21         1.10      0.83
      init_height 1.08         1.04      0.93
 treatment:nfixer 4.29         2.07      0.23

$d15n
# Check for Multicollinearity

Low Correlation

        Term  VIF Increased SE Tolerance
   treatment 3.87         1.97      0.26
      nfixer 1.73         1.32      0.58
 init_height 1.07         1.03      0.94

Moderate Correlation

             Term  VIF Increased SE Tolerance
 treatment:nfixer 5.53         2.35      0.18

$total_biomass_sqrt
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.94         1.98      0.25
           nfixer 1.16         1.07      0.87
      init_height 1.08         1.04      0.92
 treatment:nfixer 4.17         2.04      0.24

$above_biomass_sqrt
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.96         1.99      0.25
           nfixer 1.07         1.03      0.94
      init_height 1.09         1.04      0.92
 treatment:nfixer 3.97         1.99      0.25

$below_biomass_log
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.92         1.98      0.26
           nfixer 1.25         1.12      0.80
      init_height 1.08         1.04      0.93
 treatment:nfixer 4.38         2.09      0.23

$agr_log
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.97         1.99      0.25
           nfixer 1.05         1.03      0.95
      init_height 1.09         1.04      0.92
 treatment:nfixer 3.94         1.98      0.25

$amax_log
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.96         1.99      0.25
           nfixer 1.07         1.04      0.93
      init_height 1.09         1.04      0.92
 treatment:nfixer 3.98         2.00      0.25

$gs_sqrt
# Check for Multicollinearity

Low Correlation

        Term  VIF Increased SE Tolerance
   treatment 3.85         1.96      0.26
      nfixer 2.04         1.43      0.49
 init_height 1.06         1.03      0.94

Moderate Correlation

             Term  VIF Increased SE Tolerance
 treatment:nfixer 6.28         2.51      0.16

$wue_log
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.90         1.97      0.26
           nfixer 1.36         1.17      0.74
      init_height 1.07         1.04      0.93
 treatment:nfixer 4.64         2.15      0.22

$pnue_log
# Check for Multicollinearity

Low Correlation

             Term  VIF Increased SE Tolerance
        treatment 3.90         1.98      0.26
           nfixer 1.33         1.15      0.75
      init_height 1.08         1.04      0.93
 treatment:nfixer 4.58         2.14      0.22
map(models_list_nodule_count, check_collinearity)
$lmer_gaussian
# Check for Multicollinearity

Low Correlation

        Term  VIF Increased SE Tolerance
   treatment 1.06         1.03      0.94
 init_height 1.06         1.03      0.94

$lmer_gaussian_log
# Check for Multicollinearity

Low Correlation

        Term  VIF Increased SE Tolerance
   treatment 1.06         1.03      0.95
 init_height 1.06         1.03      0.95

$glmer_poisson
# Check for Multicollinearity

Low Correlation

        Term  VIF Increased SE Tolerance
   treatment 1.08         1.04      0.93
 init_height 1.08         1.04      0.93
#Warning: Model has interaction terms. VIFs might be inflated. You may check 
#multicollinearity among predictors of a model without interaction terms.

#map(models_list_q3, check_mul)

Bolker’s plots

bolker_validation <- function(model) {
    
    
    a <- plot(model, type = c("p", "smooth"))
    
    ## heteroscedasticity
    b <-     plot(model,sqrt(abs(resid(.))) ~ fitted(.), type = c("p", "smooth"))
   
    cowplot::plot_grid(a,b,nrow = 1)
}

Models for questions 1,2

map(models_list_q1_q2, bolker_validation)
$rgr


$rgr_slope


$d13c


$d15n


$total_biomass_sqrt


$above_biomass_sqrt


$below_biomass_log


$agr_log


$amax_log


$gs_sqrt


$wue_log


$pnue_log

Models for nodule count

map(models_list_nodule_count, bolker_validation)
$lmer_gaussian


$lmer_gaussian_log


$glmer_poisson

Models for question 3

map(models_list_q3, bolker_validation)
$`abovebiomasssqrt~amaxlog`


$`abovebiomasssqrt~d13c`


$`abovebiomasssqrt~d15n`


$`abovebiomasssqrt~gssqrt`


$`abovebiomasssqrt~pnuelog`


$`abovebiomasssqrt~wuelog`


$`agrlog~amaxlog`


$`agrlog~d13c`


$`agrlog~d15n`


$`agrlog~gssqrt`


$`agrlog~pnuelog`


$`agrlog~wuelog`


$`belowbiomasslog~amaxlog`


$`belowbiomasslog~d13c`


$`belowbiomasslog~d15n`


$`belowbiomasslog~gssqrt`


$`belowbiomasslog~pnuelog`


$`belowbiomasslog~wuelog`


$`rgr~amaxlog`


$`rgr~d13c`


$`rgr~d15n`


$`rgr~gssqrt`


$`rgr~pnuelog`


$`rgr~wuelog`


$`rgrslope~amaxlog`


$`rgrslope~d13c`


$`rgrslope~d15n`


$`rgrslope~gssqrt`


$`rgrslope~pnuelog`


$`rgrslope~wuelog`


$`totalbiomasssqrt~amaxlog`


$`totalbiomasssqrt~d13c`


$`totalbiomasssqrt~d15n`


$`totalbiomasssqrt~gssqrt`


$`totalbiomasssqrt~pnuelog`


$`totalbiomasssqrt~wuelog`

Performance package

Models for questions 1,2

map(models_list_q1_q2, check_model)
$rgr


$rgr_slope


$d13c


$d15n


$total_biomass_sqrt


$above_biomass_sqrt


$below_biomass_log


$agr_log


$amax_log


$gs_sqrt


$wue_log


$pnue_log

Models for nodule count

map(models_list_nodule_count, check_model)
$lmer_gaussian


$lmer_gaussian_log


$glmer_poisson

Models for question 3

map(models_list_q3, check_model)
$`abovebiomasssqrt~amaxlog`


$`abovebiomasssqrt~d13c`


$`abovebiomasssqrt~d15n`


$`abovebiomasssqrt~gssqrt`


$`abovebiomasssqrt~pnuelog`


$`abovebiomasssqrt~wuelog`


$`agrlog~amaxlog`


$`agrlog~d13c`


$`agrlog~d15n`


$`agrlog~gssqrt`


$`agrlog~pnuelog`


$`agrlog~wuelog`


$`belowbiomasslog~amaxlog`


$`belowbiomasslog~d13c`


$`belowbiomasslog~d15n`


$`belowbiomasslog~gssqrt`


$`belowbiomasslog~pnuelog`


$`belowbiomasslog~wuelog`


$`rgr~amaxlog`


$`rgr~d13c`


$`rgr~d15n`


$`rgr~gssqrt`


$`rgr~pnuelog`


$`rgr~wuelog`


$`rgrslope~amaxlog`


$`rgrslope~d13c`


$`rgrslope~d15n`


$`rgrslope~gssqrt`


$`rgrslope~pnuelog`


$`rgrslope~wuelog`


$`totalbiomasssqrt~amaxlog`


$`totalbiomasssqrt~d13c`


$`totalbiomasssqrt~d15n`


$`totalbiomasssqrt~gssqrt`


$`totalbiomasssqrt~pnuelog`


$`totalbiomasssqrt~wuelog`

Save lists with the models

saveRDS(models_list_q1_q2, file = "./processed_data/models_q1_q2.RData") 
saveRDS(models_list_q3, file = "./processed_data/models_q3_3_way_interaction.RData") 
saveRDS(models_list_nodule_count, file = "./processed_data/models_list_nodule_count.RData")